{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "from typing import Literal\n",
    "\n",
    "import histcite"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 输入文件夹路径及数据源类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "folder_path = Path(\"/Users/.../Downloads/dataset\")\n",
    "output_path = folder_path / \"result\"\n",
    "source: Literal[\"wos\", \"cssci\", \"scopus\"] = \"cssci\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取并处理文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "docs_df = histcite.ReadFile(folder_path, source).read_all()\n",
    "docs_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process = histcite.ProcessFile(docs_df, source)\n",
    "refs_df = process.extract_reference()  # 提取参考文献\n",
    "citation_relation = process.process_citation(refs_df)  # 识别引用关系"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看解析后的参考文献表\n",
    "refs_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看引用关系表\n",
    "citation_relation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 导出描述性统计数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cm = histcite.ComputeMetrics(docs_df, citation_relation, source)\n",
    "cm.write2excel(output_path / \"descriptive_statistics.xlsx\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 导出引文网络图文件\n",
    "使用 [Graphviz在线编辑器](http://magjac.com/graphviz-visual-editor/) 或下载到本地的 [Graphviz工具](https://graphviz.org/) 生成引文网络图。 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "graph = histcite.GraphViz(docs_df, citation_relation, source)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选取LSC最高的100篇文献\n",
    "doc_id_list = (\n",
    "    citation_relation[citation_relation[\"LCS\"] > 0]\n",
    "    .sort_values(\"LCS\", ascending=False)\n",
    "    .index[:100]\n",
    "    .tolist()\n",
    ")\n",
    "graph_dot_file = graph.generate_dot_file(doc_id_list)\n",
    "print(graph_dot_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选取LSC大于等于5的文献\n",
    "doc_id_list = citation_relation[citation_relation[\"LCS\"] >= 5].index.tolist()\n",
    "graph_dot_file = graph.generate_dot_file(doc_id_list)\n",
    "print(graph_dot_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看编号为10的文献的参考文献网络图，禁用时间线\n",
    "graph_dot_file = graph.generate_dot_file(10, edge_type=\"cited\", show_timeline=False)\n",
    "print(graph_dot_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看编号为10的文献的引用文献网络图\n",
    "graph_dot_file = graph.generate_dot_file(10, edge_type=\"citing\")\n",
    "print(graph_dot_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看编号为10的文献的引文网络图，包含参考文献和引用文献\n",
    "graph_dot_file = graph.generate_dot_file(10)\n",
    "print(graph_dot_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出dot文件\n",
    "with open(output_path / \"graph.dot\", \"w\") as f:\n",
    "    f.write(graph_dot_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看图节点信息\n",
    "graph_node_info = graph.generate_graph_node_info()\n",
    "graph_node_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出图节点信息\n",
    "graph_node_info.to_excel(output_path / \"graph_node_info.xlsx\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
